Subsetting States
Wastewater Data from Biobot Analytics
biobot_link <- "https://raw.githubusercontent.com/biobotanalytics/covid19-wastewater-data/master/wastewater_by_county.csv"
w_data <- read_csv(biobot_link)%>%
filter(sampling_week >= ymd("2021-03-01") & sampling_week <= ymd("2022-03-01")) %>%
mutate(fips = factor(fipscode)) %>%
select(-fipscode)
# counties with the most wastewater data
w_data %>%
group_by(name,fips) %>%
summarize(n=n()) %>%
arrange(desc(n))# states with the most wastewater data
w_data %>%
group_by(name,fips,state) %>%
summarize(n=n()) %>%
group_by(state) %>%
summarize(obs = sum(n)) %>%
arrange(desc(obs))####################################################################
# COMPARE WASTEWATER TO CORRECTED CASES
####################################################################
state <- "ma"
priors_versions <- c("v2", "v6", "v7", "v10")
state_corrected_path <- paste0(here::here(), "/data/adj_cases_output_biweekly/", state, "/")
versions <- tibble(
version = c("v2", "v6", "v7", "v8","v9", "v10"),
vlabel = c("Original",
"Distribution about Smoothed Empirical Beta",
"Distribution about Smoothed Empirical Beta\n and Distribution about Smoothed Empirical P(S|untested)",
"Distribution about Smoothed Empirical Beta \n and Smoothing County-level Test Positivity Rate",
"Distribution about Smoothed Empirical Beta \n and Smoothing County-level Test Positivity Rate\n and Updated Screening Test Positivity",
"Distribution about Smoothed Empirical P(S|untested)"))
versions <- versions %>%
mutate(vlabel= gsub("Distribution about", "", vlabel)) %>%
mutate(vlabel = gsub("Empirical", "Emp.", vlabel))%>%
mutate(vlabel = factor(vlabel, levels = .$vlabel))
################################
# ESTIMATED
################################
dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))
corrected <- map_df(priors_versions, ~readRDS(
paste0(state_corrected_path, "adj_cases_",
.x,
".RDS")) %>%
mutate(version = .x)) %>%
rename(biweek = week) %>%
left_join(dates)
corrected <- corrected %>%
left_join(versions)
pal <- c("#74A09F", "#A0748B",
"#748BA0", "#A08974",
"#D49E9F", "#D4B89E", "#AFCFE5")compare_county_wastewater <- function(county_fips, end_date ="2021-12-15") {
county_name <- w_data %>%
filter(fips == county_fips) %>%
pull(name)
custom_title = paste0("Comparing Wastewater Concentration to Corrected Estimates for ",
county_name, "\nFIPS: ", county_fips)
end_date <- ymd(end_date)
w_data_for_county <- w_data %>%
rename(date = sampling_week) %>%
filter(fips == county_fips & date <= end_date)
if(nrow(w_data_for_county) == 0) {
message(paste0("No wastewater data for FIPS: ",
county_fips));
return()}
begin_date <- min(w_data_for_county$date)
adj <- corrected %>%filter(fips == county_fips & date <= end_date) %>% pull(exp_cases_ub) %>% max()
conc_max <- max(w_data_for_county$effective_concentration_rolling_average)
adj <- conc_max/adj
corrected %>%
filter(fips == county_fips & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb*adj,
ymax = exp_cases_ub*adj,
fill = vlabel),
alpha = .7) +
geom_line(data = w_data_for_county,
aes(x = date, y =effective_concentration_rolling_average ),
color = "#DB4048",
size = 1.1) +
geom_point(data = w_data_for_county,
aes(x = date, y =effective_concentration_rolling_average ),
color = "#DB4048",
alpha = .5,
size = 1.2) +
facet_wrap(~vlabel) +
scale_fill_manual(values = pal) +
theme_bw() +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", size = 16, hjust = .5),
axis.title = element_text(size = 18),
strip.text = element_text(size = 14)
)+
scale_y_continuous(sec.axis = sec_axis(~./adj,
name = "Corrected Infection Estimates",
labels = comma),
labels = comma) +
labs(y = "Effective Concentration Rolling Average",
title = custom_title) +
scale_x_date(date_labels = "%b %Y")
}
# compare_county_wastewater(county_fips = "25023")Compare Wastewater Trends to Corrected Estimates
walk(unique(corrected$fips), ~{
plt <- compare_county_wastewater(county_fips = .x)
print(plt)
} ) ## No wastewater data for FIPS: 25007,25019
## NULL
## No wastewater data for FIPS: 25011
## NULL
## No wastewater data for FIPS: 25013
## NULL
## No wastewater data for FIPS: 25021
## NULL
library(scales)
end_date <- ymd("2021-12-15")
w_data_for_county <- w_data %>%
rename(date = sampling_week) %>%
filter(fips == "25023" & date <= end_date)
begin_date <- min(w_data_for_county$date)
adj <- corrected %>%filter(fips == "25023" & date <= end_date) %>% pull(exp_cases_ub) %>% max()
conc_max <- max(w_data_for_county$effective_concentration_rolling_average)
adj <- conc_max/adj
corrected %>%
filter(fips == "25023" & date >= begin_date & date <= end_date) %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb*adj,
ymax = exp_cases_ub*adj,
fill = vlabel),
alpha = .7) +
geom_line(data = w_data_for_county,
aes(x = date, y =effective_concentration_rolling_average ),
color = "#DB4048",
size = 1.1) +
facet_wrap(~vlabel) +
scale_fill_manual(values = pal) +
theme_bw() +
theme(
legend.position = "none"
)+
scale_y_continuous(sec.axis = sec_axis(~./adj,
name = "Corrected Infection Estimates",
labels = comma),
labels = comma) +
labs(y = "Effective Concentration Rolling Average") +
theme(axis.title = element_text(size = 18))
adj <- conc_max/adj
w_data %>%
filter(fips == "25023") %>%
select(date = sampling_week,
fips = fips,
value = effective_concentration_rolling_average) %>%
mutate(source = "concentration") %>%
bind_rows(all_25005) %>%
filter(date <= ymd("2021-12-15")) %>%
ggplot(aes(x=date,y=value,color=source)) +
geom_line()
corrected %>%
filter(fips == "25023") %>%
left_join(dates) %>%
ggplot() +
geom_line(data = aes(x = ))
# look at counties in MA
w_data %>%
filter(state == "MA"& sampling_week <= ymd("2021-12-15")) %>%
ggplot(aes(x = sampling_week, y = effective_concentration_rolling_average)) +
geom_line() +
geom_point(size =.5, alpha = .5) +
geom_line() +
facet_wrap(~fips)
all_ma <- readRDS(paste0(here::here(), "/data/ma/ma_full_county.RDS")) %>%
filter(start_date >= ymd("2021-03-01") & start_date <= ymd("2022-03-01"))
all_ma %>%
filter(fips == "25005") %>%
ggplot(aes(x = start_date, y = positive)) +
geom_point()
w_data_for_county <- w_data %>%
filter(fips == "25005" & date <= ymd("2021-12-15"))
w_data %>%
filter(fips == "25005") %>%
select(date = sampling_week,
fips = fips,
value = effective_concentration_rolling_average) %>%
mutate(source = "concentration") %>%
bind_rows(all_25005) %>%
filter(date <= ymd("2021-12-15")) %>%
ggplot(aes(x=date,y=value,color=source)) +
geom_line()
w_data %>%
filter(state == "MA") %>% pull(fips) %>% unique() %>% length()
adj <- all_ma %>%
filter(fips == "25005" & start_date <= ymd("2021-12-15" )) %>%
pull(positive) %>% max()
conc_max <- w_data %>%
filter(fips == "25005" & sampling_week <= ymd("2021-12-15" )) %>%
pull(effective_concentration_rolling_average) %>%
max()
adj <- conc_max/adj
w_data %>%
filter(state == "MA"& sampling_week <= ymd("2021-12-15") & fips == "25005") %>%
ggplot() +
geom_line(aes(x = sampling_week, y = effective_concentration_rolling_average)) +
geom_line(aes(x = start_date, y = positive/adj)) +
geom_point(size =.5, alpha = .5) +
geom_line() +
facet_wrap(~fips)
w_data %>%
filter(state == "MA"& sampling_week <= ymd("2021-12-15") & fips == "25005") %>%
left_join(all_ma[all_ma$fips == "25005" & all_ma$start_date <= ymd("2021-12-15"),]) %>%
mutate(positive_adj = positive / adj) %>% View()
all_25005 <- all_ma %>%
filter(fips == "25005") %>%
mutate(positive = positive*adj) %>%
select(date = start_date, fips = fips, value=positive) %>%
mutate(source = "cases")
all_25005 %>%
ggplot(aes(x = date, y = value)) +
geom_line()
w_data %>%
filter(fips == "25005") %>%
select(date = sampling_week,
fips = fips,
value = effective_concentration_rolling_average) %>%
ggplot(aes(x = date, y = value)) +
geom_line()
w_data %>%
filter(fips == "25005") %>%
select(date = sampling_week,
fips = fips,
value = effective_concentration_rolling_average) %>%
mutate(source = "concentration") %>%
bind_rows(all_25005) %>%
filter(date <= ymd("2021-12-15")) %>%
ggplot(aes(x=date,y=value,color=source)) +
geom_line()screening_data_link <- paste0(
"https://api.covidcast.cmu.edu/epidata/covidcast/?data_source=fb-survey",
"&signal=smoothed_wscreening_tested_positive_14d,smoothed_wtested_positive_14d",
"&geo_type=state&time_type=day&time_values=20210320-20221212&geo_value=",
state_name)
fb_screening <- httr::GET(screening_data_link)
fb_symptoms <-jsonlite::fromJSON(
httr::content(fb_screening,
as = "text",
encoding = "UTF-8"))$epidata %>%
mutate(date = lubridate::ymd(time_value),
week = lubridate::week(date),
state = geo_value,
value = value/100,
stderr = stderr/100) %>%
filter(date <= lubridate::ymd("2022-03-01")) %>%
as_tibble()